library(beeswarm)
library(naniar)
library(zoo)

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric
# install.packages("zoo")
library(janitor)

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
# install.packages("GGally")
# library(sets)
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.3     ✓ purrr   0.3.4
✓ tibble  3.0.6     ✓ stringr 1.4.0
✓ tidyr   1.1.2     ✓ forcats 0.5.1
✓ readr   1.4.0     
── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(GGally) # for ggpairs
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
library(lubridate)

Attaching package: ‘lubridate’

The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union
# install.packages("maps")
# library(maps)
load_file <- function(file_path){
  read_csv(file_path)
}

tx_data <- load_file("./../data/COVID-19_cases_TX.csv")

── Column specification ─────────────────────────────────────────────────────────────────────────────────────────
cols(
  county_fips_code = col_character(),
  county_name = col_character(),
  state = col_character(),
  state_fips_code = col_double(),
  date = col_date(format = ""),
  confirmed_cases = col_double(),
  deaths = col_double()
)
global_mobility_report <- load_file("./../data/Global_Mobility_Report.csv")

── Column specification ─────────────────────────────────────────────────────────────────────────────────────────
cols(
  country_region_code = col_character(),
  country_region = col_character(),
  sub_region_1 = col_character(),
  sub_region_2 = col_logical(),
  metro_area = col_logical(),
  iso_3166_2_code = col_character(),
  census_fips_code = col_logical(),
  date = col_date(format = ""),
  retail_and_recreation_percent_change_from_baseline = col_double(),
  grocery_and_pharmacy_percent_change_from_baseline = col_double(),
  parks_percent_change_from_baseline = col_double(),
  transit_stations_percent_change_from_baseline = col_double(),
  workplaces_percent_change_from_baseline = col_double(),
  residential_percent_change_from_baseline = col_double()
)

4199216 parsing failures.
 row        col           expected                  actual                                   file
3036 metro_area 1/0/T/F/TRUE/FALSE Kabul Metropolitan Area './../data/Global_Mobility_Report.csv'
3037 metro_area 1/0/T/F/TRUE/FALSE Kabul Metropolitan Area './../data/Global_Mobility_Report.csv'
3038 metro_area 1/0/T/F/TRUE/FALSE Kabul Metropolitan Area './../data/Global_Mobility_Report.csv'
3039 metro_area 1/0/T/F/TRUE/FALSE Kabul Metropolitan Area './../data/Global_Mobility_Report.csv'
3040 metro_area 1/0/T/F/TRUE/FALSE Kabul Metropolitan Area './../data/Global_Mobility_Report.csv'
.... .......... .................. ....................... ......................................
See problems(...) for more details.
cases_plus_census <- load_file("./../data/COVID-19_cases_plus_census.csv")

── Column specification ─────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  county_fips_code = col_character(),
  county_name = col_character(),
  state = col_character(),
  state_fips_code = col_character(),
  date = col_date(format = ""),
  geo_id = col_character(),
  pop_5_years_over = col_logical(),
  speak_only_english_at_home = col_logical(),
  speak_spanish_at_home = col_logical(),
  speak_spanish_at_home_low_english = col_logical(),
  pop_15_and_over = col_logical(),
  pop_never_married = col_logical(),
  pop_now_married = col_logical(),
  pop_separated = col_logical(),
  pop_widowed = col_logical(),
  pop_divorced = col_logical()
)
ℹ Use `spec()` for the full column specifications.
cols_keep <- c("county_fips_code", "confirmed_cases", "deaths", "median_income", "male_pop", "female_pop", "total_pop", "median_age", "worked_at_home")
subset_census <- cases_plus_census[cols_keep]

# cols_keep <- c("date", "retail_and_recreation_percent_change_from_baseline", "grocery_and_pharmacy_percent_change_from_baseline", "parks_percent_change_from_baseline", "transit_stations_percent_change_from_baseline", "workplaces_percent_change_from_baseline", "residential_percent_change_from_baseline")
# subset_mobility <- global_mobility_report[cols_keep]
# glo
# subset_mobility$date <- as.Date(subset_mobility$date, format="%Y-%m-%d")
# global_mobility_report
vis_miss(global_mobility_report, sort_miss = T, warn_large_data= F)

vis_miss(tx_data, sort_miss = T, warn_large_data= F)

vis_miss(subset_census, sort_miss = T, warn_large_data = F)

library(RColorBrewer)
plot_vs_county <- function(df, col_val, percentile=FALSE,
                           fips_title="county_fips_code", banks=6, 
                           legend_title="", graphic_title=""){
  # Subset for speed 
  df <- df[c(fips_title, col_val)]
  
  # Get county data
  gcounty <- ggplot2::map_data("county")
  # USA map data
  gusa <- map_data("state")
  
  if (banks > 9){
    mycolors <- colorRampPalette(brewer.pal(9, "Reds"))(banks)
  }
  
  # Format with subregions
  fipstab <-
      transmute(maps::county.fips, fips, county = sub(":.*", "", polyname)) %>%
      unique() %>%
      separate(county, c("region", "subregion"), sep = ",")
  
  # Combine in desired order (NA for missing)
  gcounty <- left_join(gcounty, fipstab, c("region", "subregion"))


  dis <- df
  dis$rprop <- rank(df[col_val])
  dis$pcls <- cut(100 * percent_rank(df[col_val]), seq(0, 100, len = banks),
                        include.lowest = TRUE)

  # Missing data
  anti_join(gcounty, dis, by = c("fips" = fips_title)) %>%
    select(region, subregion) %>%
    unique()
  gcounty_pop <- left_join(gcounty, dis, by = c("fips" = fips_title))
  fill_vals <- gcounty_pop[col_val]

  # Plot
  if (legend_title == ""){
    legend_title <- col_val
  }

  if (percentile == FALSE){
    # names(gcounty_pop)[names(gcounty_pop) == col_val] <- "col_of_interest"
    plt <- ggplot(gcounty_pop) +
      geom_polygon(aes(long, lat, group = group, fill = get(col_val)),
                   color = "grey", size = 0.1, name="Percent Infected") +
      geom_polygon(aes(long, lat, group = group),
                   fill = NA, data = gusa, color = "lightgrey") +
      coord_map("bonne", parameters = 41.6) + ggthemes::theme_map()+
      scale_fill_gradient2()
       # scale_fill_gradient(low = "white", high = "red", na.value = "grey")
      # scale_fill_gradientn(colours = terrain.colors(10))
  }

  if (percentile == TRUE){
    plt <- ggplot(gcounty_pop) +
      geom_polygon(aes(long, lat, group = group, fill = pcls),
                   color = "grey", size = 0.1) +
      geom_polygon(aes(long, lat, group = group),
                   fill = NA, data = gusa, color = "lightgrey") +
      coord_map("bonne", parameters = 41.6) + ggthemes::theme_map() +
      scale_fill_manual(values = mycolors, na.value = "grey") +
      # scale_fill_brewer(palette = "viridis", na.value = "grey") +
      theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
            legend.background = element_rect(fill = NA), 
            legend.position = "left")
  }
  plt <- plt + labs(fill=legend_title) + ggtitle(graphic_title)
  plt
}
subset_census
subset_census['pct_infected'] <- subset_census['confirmed_cases']/subset_census['total_pop']
subset_census['pct_deaths'] <- subset_census['deaths']/subset_census['total_pop']
subset_census$county_fips_code <-as.integer(subset_census$county_fips_code)
subset_census
plot_vs_county(subset_census, "pct_infected", legend_title = "Percent Infected")
Ignoring unknown parameters: name

plot_vs_county(subset_census, "pct_infected", percentile = TRUE, banks=11, 
               legend_title = "Percentile Infected",
               graphic_title = "Percentile of Percentage of People Infected by County")

plot_vs_county(subset_census, "pct_deaths", percentile = TRUE, banks=11, 
               legend_title = "Percentile Deaths",
               graphic_title = "Percentile of Percentage of Deaths by County")

census_corr_cols <- c("deaths", "confirmed_cases", "median_income", "male_pop",
                      "female_pop", "total_pop", "median_age", "worked_at_home")
ggcorr(subset_census[census_corr_cols], low="red", mid="grey", high="blue", hjust= .75, size=3, 
       label = TRUE, label_size = 3, label_color = "white") + ggplot2::labs(title = "Pearson Correlation of Important Variables")

global_mobility_report
country_date_pct_change <- global_mobility_report %>% select(country_region_code
                                                             | contains("date") 
                                                             | contains("percent"))
country_date_pct_change
coi_downsampled <- country_date_pct_change %>% filter(country_region_code %in% 
                                                        c("US", "CA", "NZ", "AE", "CN", "DE", "JP")) %>% 
  filter(weekdays(date) == "Monday") %>% group_by(country_region_code, date) %>% summarise_all(mean, na.rm = T) %>% arrange(country_region_code, date)
coi_downsampled
interested_cols <- c("retail_and_recreation_percent_change_from_baseline",
                     "grocery_and_pharmacy_percent_change_from_baseline",
                     "parks_percent_change_from_baseline",
                     "transit_stations_percent_change_from_baseline",
                     "workplaces_percent_change_from_baseline",
                     "residential_percent_change_from_baseline")

col_labels <- c("Average Retail and Recreation Percent Change from Baseline",
                     "Average Grocery and Pharmacy Percent Change from Baseline",
                     "Average Parks Percent Change from Baseline",
                     "Average Transit Stations Percent Change from Baseline",
                     "Average Workplaces Percent Change from Baseline",
                     "Average Residential Percent Change from Baseline")


for (i in 1:length(col_labels)){
  print(i)
  plt <- ggplot(coi_downsampled,
       aes(x=date, y=get(interested_cols[i]), group=country_region_code,
                            color=country_region_code))+
  geom_point(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)), size=.5)+
  geom_line(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)))+
  # geom_point(size=.5)+geom_line()+
  labs(y = col_labels[i], x = "Date", color = "Country")+
  theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
        axis.title.y = element_text(size = 8))
  save_path <- paste(c("./../imgs/", interested_cols[i], ".png"), collapse = "")
  ggsave(save_path)
  print(plt)
}
[1] 1
Saving 7 x 7 in image
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6

# ggplot(coi_downsampled,
#        aes(x=date, y=grocery_and_pharmacy_percent_change_from_baseline, group=country_region_code,
#                             color=country_region_code))+
#   geom_point(aes(y=rollmean(retail_and_recreation_percent_change_from_baseline, k=10, na.pad=TRUE)), size=.5)+
#   geom_line(aes(y=rollmean(retail_and_recreation_percent_change_from_baseline, k=10, na.pad=TRUE)))+
#   # geom_point(size=.5)+geom_line()+
#   labs(y = "Average Grocery and Pharmacy Percent Change from Baseline", x = "Date", color = "Country")+
#   theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
#         axis.title.y = element_text(size = 8))
#   # geom_line()
tx_county_data <- tx_data %>% filter(county_name != "Statewide Unallocated")
tx_total_state_data <- tx_data %>% filter(county_name == "Statewide Unallocated")
tx_total_state_data$cumulative_deaths <- cumsum(tx_total_state_data$deaths)
tx_total_state_data$cumulative_cases <- cumsum(tx_total_state_data$confirmed_cases)
tx_county_data
tx_total_state_data
interested_cols <- c("confirmed_cases", "deaths", "cumulative_deaths", "cumulative_cases")
col_labels <- c("Confirmed Cases Per Day", "Deaths Per Day", "Total Deaths", "Total Cases")

for (i in 1:length(col_labels)){
  print(i)
  plt <- ggplot(tx_total_state_data,
       aes(x=date, y=get(interested_cols[i])))+
  # geom_point(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)), size=.5)+
  # geom_line(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)))+
  # geom_point(size=.5)+geom_line()+
    geom_line()+
  labs(y = col_labels[i], x = "Date")
  # theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
  #       axis.title.y = element_text(size = 8))
  # save_path <- paste(c("./../imgs/", interested_cols[i], ".png"), collapse = "")
  # ggsave(save_path)
  print(plt)
}
[1] 1
[1] 2
[1] 3
[1] 4

Since the above doesn’t really make sense (only 78 confirmed cases with over a thousand deaths), I am going to analyze on a per county basis and maybe that data will be more clear.

tx_by_day_based_on_county <- tx_county_data %>% 
  select(date, confirmed_cases, deaths) %>%
  group_by(date) %>% 
  summarise_all(sum, na.rm = T) %>%
  arrange(date)

interested_cols <- c("confirmed_cases", "deaths")
col_labels <- c("Total Cases", "Total Deaths")

for (i in 1:length(col_labels)){
  print(i)
  plt <- ggplot(tx_by_day_based_on_county,
       aes(x=date, y=get(interested_cols[i])))+
  # geom_point(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)), size=.5)+
  # geom_line(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)))+
  geom_point(size=.5)+geom_line()+
    # geom_line()+
  labs(y = col_labels[i], x = "Date")
  # theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
  #       axis.title.y = element_text(size = 8))
  # save_path <- paste(c("./../imgs/", interested_cols[i], ".png"), collapse = "")
  # ggsave(save_path)
  print(plt)
}
[1] 1
[1] 2

tx_by_day_based_on_county
library(scales)
# tx_by_day_based_on_county %>% pivot_longer(!date, names_to = "type", values_to = "count")
ggplot(tx_by_day_based_on_county %>% pivot_longer(!date, names_to = "type", values_to = "count"),
       aes(x=date, y=count, group=type, color = type))+
         geom_line()+
  scale_y_continuous(trans = "log10",
                     labels = trans_breaks('log10', math_format(10^.x)))+
  scale_color_manual(labels = c("Confirmed Cases", "Deaths"), values = c("confirmed_cases"="blue",
                                                                         "deaths"="red"))+
  labs(y = "Total Persons", x = "Date", color = "")+
  theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)))+
  ggtitle("Texas COVID-19 Cases")
  ggsave("./../imgs/texas_covid_cases_total.png")
Saving 7.29 x 4.51 in image

  # scale_y_log10()
         # labs(y = "Total Persons", x = "Date"))

# )
# 
# plt <- ggplot(coi_downsampled,
#        aes(x=date, y=get(interested_cols[i]), group=country_region_code,
#                             color=country_region_code))+
#   geom_point(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)), size=.5)+
#   geom_line(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)))+
#   # geom_point(size=.5)+geom_line()+
#   labs(y = col_labels[i], x = "Date", color = "Country")+
#   theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
#         axis.title.y = element_text(size = 8))
#   save_path <- paste(c("./../imgs/", interested_cols[i], ".png"), collapse = "")
#   ggsave(save_path)
#   print(plt)
tx_county_data
tx_state <- map_data("state") %>% subset(region == "texas")
tx_county_map_data <- map_data("county") %>% subset(region == "texas")

tx_state
tx_county_map_data

# Lowercase
tx_county_data$county_name <- tolower(tx_county_data$county_name)

# Remove ' county'
tx_county_data$county_name <- sub("\\s*county\\b.*", "", tx_county_data$county_name)
tx_county_data

# Join the data with state geo info
tx_geo_data <- left_join(tx_county_map_data, tx_county_data, by = c("subregion" = "county_name"))
tx_geo_data
tx_geo_data$county_fips_code <- as.integer(tx_geo_data$county_fips_code)
plot_vs_county(tx_geo_data, "confirmed_cases")
Ignoring unknown parameters: name

# dallas_data <- tx_county_data %>% filter(county_name == "dallas") 
dallas_data <- tx_county_data %>% filter(county_name == "dallas") %>% 
  select(date, confirmed_cases, deaths)

ggplot(dallas_data %>% pivot_longer(!date, names_to = "type", values_to = "count"),
       aes(x=date, y=count, group=type, color = type))+
         geom_line()+
  scale_y_continuous(trans = "log10", labels = trans_breaks("log10", math_format(10^.x)))+
  scale_y_log10(breaks=c(100, 300, 500, 1000, 3000, 5000, 10000, 30000, 50000, 100000, 300000),
                labels=c('100', '300', '500', '1000', '3000', '5000', '10000', '30000', '50000', '100000', '300000'))+
  scale_color_manual(labels = c("Confirmed Cases", "Deaths"), values = c("confirmed_cases"="blue",
                                                                         "deaths"="red"))+
  labs(y = "Total Persons", x = "Date", color = "")+
  theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)))+
  ggtitle("Dallas COVID-19 Cases")
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
  ggsave("./../imgs/dallas_covid_cases_total.png")
Saving 7.29 x 4.51 in image

subset_census
---
title: "Cleaned Notebook"
output: html_notebook
---

```{r}
library(beeswarm)
library(naniar)
library(zoo)
# install.packages("zoo")
library(janitor)
library(dplyr)
# install.packages("GGally")
# library(sets)
library(tidyverse)
library(ggplot2)
library(GGally) # for ggpairs
library(lubridate)
# install.packages("maps")
# library(maps)
```

```{r}
load_file <- function(file_path){
  read_csv(file_path)
}

tx_data <- load_file("./../data/COVID-19_cases_TX.csv")
global_mobility_report <- load_file("./../data/Global_Mobility_Report.csv")
cases_plus_census <- load_file("./../data/COVID-19_cases_plus_census.csv")
```
```{r}
cols_keep <- c("county_fips_code", "confirmed_cases", "deaths", "median_income", "male_pop", "female_pop", "total_pop", "median_age", "worked_at_home")
subset_census <- cases_plus_census[cols_keep]

# cols_keep <- c("date", "retail_and_recreation_percent_change_from_baseline", "grocery_and_pharmacy_percent_change_from_baseline", "parks_percent_change_from_baseline", "transit_stations_percent_change_from_baseline", "workplaces_percent_change_from_baseline", "residential_percent_change_from_baseline")
# subset_mobility <- global_mobility_report[cols_keep]
# glo
# subset_mobility$date <- as.Date(subset_mobility$date, format="%Y-%m-%d")
```

```{r}
# global_mobility_report
vis_miss(global_mobility_report, sort_miss = T, warn_large_data= F)
```


```{r}
vis_miss(tx_data, sort_miss = T, warn_large_data= F)
vis_miss(subset_census, sort_miss = T, warn_large_data = F)
```

```{r}
library(RColorBrewer)
plot_vs_county <- function(df, col_val, percentile=FALSE,
                           fips_title="county_fips_code", banks=6, 
                           legend_title="", graphic_title=""){
  # Subset for speed 
  df <- df[c(fips_title, col_val)]
  
  # Get county data
  gcounty <- ggplot2::map_data("county")
  # USA map data
  gusa <- map_data("state")
  
  if (banks > 9){
    mycolors <- colorRampPalette(brewer.pal(9, "Reds"))(banks)
  }
  
  # Format with subregions
  fipstab <-
      transmute(maps::county.fips, fips, county = sub(":.*", "", polyname)) %>%
      unique() %>%
      separate(county, c("region", "subregion"), sep = ",")
  
  # Combine in desired order (NA for missing)
  gcounty <- left_join(gcounty, fipstab, c("region", "subregion"))


  dis <- df
  dis$rprop <- rank(df[col_val])
  dis$pcls <- cut(100 * percent_rank(df[col_val]), seq(0, 100, len = banks),
                        include.lowest = TRUE)

  # Missing data
  anti_join(gcounty, dis, by = c("fips" = fips_title)) %>%
    select(region, subregion) %>%
    unique()
  gcounty_pop <- left_join(gcounty, dis, by = c("fips" = fips_title))
  fill_vals <- gcounty_pop[col_val]

  # Plot
  if (legend_title == ""){
    legend_title <- col_val
  }

  if (percentile == FALSE){
    # names(gcounty_pop)[names(gcounty_pop) == col_val] <- "col_of_interest"
    plt <- ggplot(gcounty_pop) +
      geom_polygon(aes(long, lat, group = group, fill = get(col_val)),
                   color = "grey", size = 0.1, name="Percent Infected") +
      geom_polygon(aes(long, lat, group = group),
                   fill = NA, data = gusa, color = "lightgrey") +
      coord_map("bonne", parameters = 41.6) + ggthemes::theme_map()+
      scale_fill_gradient2()
       # scale_fill_gradient(low = "white", high = "red", na.value = "grey")
      # scale_fill_gradientn(colours = terrain.colors(10))
  }

  if (percentile == TRUE){
    plt <- ggplot(gcounty_pop) +
      geom_polygon(aes(long, lat, group = group, fill = pcls),
                   color = "grey", size = 0.1) +
      geom_polygon(aes(long, lat, group = group),
                   fill = NA, data = gusa, color = "lightgrey") +
      coord_map("bonne", parameters = 41.6) + ggthemes::theme_map() +
      scale_fill_manual(values = mycolors, na.value = "grey") +
      # scale_fill_brewer(palette = "viridis", na.value = "grey") +
      theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
            legend.background = element_rect(fill = NA), 
            legend.position = "left")
  }
  plt <- plt + labs(fill=legend_title) + ggtitle(graphic_title)
  plt
}
```

```{r}
subset_census
```

```{r}
subset_census['pct_infected'] <- subset_census['confirmed_cases']/subset_census['total_pop']
subset_census['pct_deaths'] <- subset_census['deaths']/subset_census['total_pop']
subset_census$county_fips_code <-as.integer(subset_census$county_fips_code)
subset_census
```
```{r}
plot_vs_county(subset_census, "pct_infected", legend_title = "Percent Infected")
plot_vs_county(subset_census, "pct_infected", percentile = TRUE, banks=11, 
               legend_title = "Percentile Infected",
               graphic_title = "Percentile of Percentage of People Infected by County")
plot_vs_county(subset_census, "pct_deaths", percentile = TRUE, banks=11, 
               legend_title = "Percentile Deaths",
               graphic_title = "Percentile of Percentage of Deaths by County")
```

```{r}
census_corr_cols <- c("deaths", "confirmed_cases", "median_income", "male_pop",
                      "female_pop", "total_pop", "median_age", "worked_at_home")
ggcorr(subset_census[census_corr_cols], low="red", mid="grey", high="blue", hjust= .75, size=3, 
       label = TRUE, label_size = 3, label_color = "white") + ggplot2::labs(title = "Pearson Correlation of Important Variables")
```
```{r}
global_mobility_report
```


```{r}
country_date_pct_change <- global_mobility_report %>% select(country_region_code
                                                             | contains("date") 
                                                             | contains("percent"))
country_date_pct_change
```
```{r}
coi_downsampled <- country_date_pct_change %>% filter(country_region_code %in% 
                                                        c("US", "CA", "NZ", "AE", "CN", "DE", "JP")) %>% 
  filter(weekdays(date) == "Monday") %>% group_by(country_region_code, date) %>% summarise_all(mean, na.rm = T) %>% arrange(country_region_code, date)
coi_downsampled
```



```{r}
interested_cols <- c("retail_and_recreation_percent_change_from_baseline",
                     "grocery_and_pharmacy_percent_change_from_baseline",
                     "parks_percent_change_from_baseline",
                     "transit_stations_percent_change_from_baseline",
                     "workplaces_percent_change_from_baseline",
                     "residential_percent_change_from_baseline")

col_labels <- c("Average Retail and Recreation Percent Change from Baseline",
                     "Average Grocery and Pharmacy Percent Change from Baseline",
                     "Average Parks Percent Change from Baseline",
                     "Average Transit Stations Percent Change from Baseline",
                     "Average Workplaces Percent Change from Baseline",
                     "Average Residential Percent Change from Baseline")


for (i in 1:length(col_labels)){
  print(i)
  plt <- ggplot(coi_downsampled,
       aes(x=date, y=get(interested_cols[i]), group=country_region_code,
                            color=country_region_code))+
  geom_point(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)), size=.5)+
  geom_line(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)))+
  # geom_point(size=.5)+geom_line()+
  labs(y = col_labels[i], x = "Date", color = "Country")+
  theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
        axis.title.y = element_text(size = 8))
  save_path <- paste(c("./../imgs/", interested_cols[i], ".png"), collapse = "")
  ggsave(save_path)
  print(plt)
}
# ggplot(coi_downsampled,
#        aes(x=date, y=grocery_and_pharmacy_percent_change_from_baseline, group=country_region_code,
#                             color=country_region_code))+
#   geom_point(aes(y=rollmean(retail_and_recreation_percent_change_from_baseline, k=10, na.pad=TRUE)), size=.5)+
#   geom_line(aes(y=rollmean(retail_and_recreation_percent_change_from_baseline, k=10, na.pad=TRUE)))+
#   # geom_point(size=.5)+geom_line()+
#   labs(y = "Average Grocery and Pharmacy Percent Change from Baseline", x = "Date", color = "Country")+
#   theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
#         axis.title.y = element_text(size = 8))
#   # geom_line()
```

```{r}
tx_county_data <- tx_data %>% filter(county_name != "Statewide Unallocated")
tx_total_state_data <- tx_data %>% filter(county_name == "Statewide Unallocated")
tx_total_state_data$cumulative_deaths <- cumsum(tx_total_state_data$deaths)
tx_total_state_data$cumulative_cases <- cumsum(tx_total_state_data$confirmed_cases)
tx_county_data
tx_total_state_data
```

```{r}

```

```{r}
interested_cols <- c("confirmed_cases", "deaths", "cumulative_deaths", "cumulative_cases")
col_labels <- c("Confirmed Cases Per Day", "Deaths Per Day", "Total Deaths", "Total Cases")

for (i in 1:length(col_labels)){
  print(i)
  plt <- ggplot(tx_total_state_data,
       aes(x=date, y=get(interested_cols[i])))+
  # geom_point(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)), size=.5)+
  # geom_line(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)))+
  # geom_point(size=.5)+geom_line()+
    geom_line()+
  labs(y = col_labels[i], x = "Date")
  # theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
  #       axis.title.y = element_text(size = 8))
  # save_path <- paste(c("./../imgs/", interested_cols[i], ".png"), collapse = "")
  # ggsave(save_path)
  print(plt)
}
```

Since the above doesn't really make sense (only 78 confirmed cases with over a thousand deaths), I am going to analyze on a per county basis and maybe that data will be more clear.

```{r}
tx_by_day_based_on_county <- tx_county_data %>% 
  select(date, confirmed_cases, deaths) %>%
  group_by(date) %>% 
  summarise_all(sum, na.rm = T) %>%
  arrange(date)

# interested_cols <- c("confirmed_cases", "deaths")
# col_labels <- c("Total Cases", "Total Deaths")
# 
# for (i in 1:length(col_labels)){
#   print(i)
#   title <- paste(c("Texas ", col_labels[i]), collapse = "")
#   plt <- ggplot(tx_by_day_based_on_county,
#        aes(x=date, y=get(interested_cols[i])))+
#   # geom_point(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)), size=.5)+
#   # geom_line(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)))+
#   # geom_point(size=.5)+geom_line()+
#     geom_line(color)+
#   labs(y = col_labels[i], x = "Date", title = title)
#   # theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
#   #       axis.title.y = element_text(size = 8))
#   # save_path <- paste(c("./../imgs/", interested_cols[i], ".png"), collapse = "")
#   # ggsave(save_path)
#   print(plt)
# }

```
```{r}
tx_by_day_based_on_county
```



```{r}
library(scales)
# tx_by_day_based_on_county %>% pivot_longer(!date, names_to = "type", values_to = "count")
ggplot(tx_by_day_based_on_county %>% pivot_longer(!date, names_to = "type", values_to = "count"),
       aes(x=date, y=count, group=type, color = type))+
         geom_line()+
  scale_y_continuous(trans = "log10",
                     labels = trans_breaks('log10', math_format(10^.x)))+
  scale_color_manual(labels = c("Confirmed Cases", "Deaths"), values = c("confirmed_cases"="blue",
                                                                         "deaths"="red"))+
  labs(y = "Total Persons", x = "Date", color = "")+
  theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)))+
  ggtitle("Texas COVID-19 Cases")
  ggsave("./../imgs/texas_covid_cases_total.png")
  # scale_y_log10()
         # labs(y = "Total Persons", x = "Date"))

# )
# 
# plt <- ggplot(coi_downsampled,
#        aes(x=date, y=get(interested_cols[i]), group=country_region_code,
#                             color=country_region_code))+
#   geom_point(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)), size=.5)+
#   geom_line(aes(y=rollmean(get(interested_cols[i]), k=10, na.pad=TRUE)))+
#   # geom_point(size=.5)+geom_line()+
#   labs(y = col_labels[i], x = "Date", color = "Country")+
#   theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
#         axis.title.y = element_text(size = 8))
#   save_path <- paste(c("./../imgs/", interested_cols[i], ".png"), collapse = "")
#   ggsave(save_path)
#   print(plt)
```

```{r}
tx_county_data
```


```{r}
tx_state <- map_data("state") %>% subset(region == "texas")
tx_county_map_data <- map_data("county") %>% subset(region == "texas")

tx_state
tx_county_map_data

# Lowercase
tx_county_data$county_name <- tolower(tx_county_data$county_name)

# Remove ' county'
tx_county_data$county_name <- sub("\\s*county\\b.*", "", tx_county_data$county_name)
tx_county_data

# Join the data with state geo info
tx_geo_data <- left_join(tx_county_map_data, tx_county_data, by = c("subregion" = "county_name"))
tx_geo_data
```
```{r}
tx_geo_data$county_fips_code <- as.integer(tx_geo_data$county_fips_code)
tx_data_plot <- tx_geo_data %>% group_by()
plot_vs_county(tx_geo_data, "confirmed_cases")
# ggplot(tx_geo_data) +
#          coord_map() + 
#   ggthemes::theme_map()+
#   geom_polygon(aes(long, lat, group = subregion, fill = confirmed_cases))
# pc_cont_iowa <- geom_polygon(aes(long, lat, group = group, fill = pchange),
#                              color = "grey", size = 0.2)
```

```{r}
# dallas_data <- tx_county_data %>% filter(county_name == "dallas") 
dallas_data <- tx_county_data %>% filter(county_name == "dallas") %>% 
  select(date, confirmed_cases, deaths)

ggplot(dallas_data %>% pivot_longer(!date, names_to = "type", values_to = "count"),
       aes(x=date, y=count, group=type, color = type))+
         geom_line()+
  scale_y_continuous(trans = "log10", labels = trans_breaks("log10", math_format(10^.x)))+
  scale_y_log10(breaks=c(100, 300, 500, 1000, 3000, 5000, 10000, 30000, 50000, 100000, 300000),
                labels=c('100', '300', '500', '1000', '3000', '5000', '10000', '30000', '50000', '100000', '300000'))+
  scale_color_manual(labels = c("Confirmed Cases", "Deaths"), values = c("confirmed_cases"="blue",
                                                                         "deaths"="red"))+
  labs(y = "Total Persons", x = "Date", color = "")+
  theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)))+
  ggtitle("Dallas COVID-19 Cases")
  ggsave("./../imgs/dallas_covid_cases_total.png")
```


```{r}
subset_census
```















